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c PARALLEL ALGORITHM OF INVERSE MATRIX 

subrout i ne i nvers i on (a, k, n) 

shared array a(k, n), ip(n) 

create threads 

set nothrd and numthrd 
c nothrd IS THREAD NUMBER 1 -#TH. numthrd=#TH (TOTAL NUMBER OF THREADS) 

iblk=BLOCK WIDTH 

nb=(n+blk-1)/blk 

do i=1, nb-1 

nbase= (nb-l)*blk 

ca II LU (a, k, n. nbase, b I k. i p. nothrd. numthrd) 

I LU DECOMPOSITION IS PERFORMED ON SPECIFIED BLOCK. ROW BLOCK IS 

UPDATED. 

! OPERATION FOR UPDATING LOWER RIGHT SQUARE BLOCK MATRIX ARE PERFORMED 
! IN PARALLEL. ROW INTERCHANGING INFORMATION IS RETURNED TO 

i p (nbase+1 : nbase-b I k) . 
! FOR PARALLEL ALGORITHM OF THIS PORTION. REFER TO JAPANESE PATENT 
APPL I CAT I ON NO. HE I -1 2-358232 
do i =nbase+l . nbase+b I k 
if (ip(i)>i)then 

exchange (a (1, 1 :nbase), a(ip(l). 1 : nbase)) 

end if 

enddo 

ca 1 1 update (a, k. n, nbase, b I k, nothrd, numthrd) 
enddo 

nbase=(nb-l)*blk 
b I k I ast=n-nbase 

ca II LU (a. k, n, nbase. b I k I ast. nothrd, numthrd) 
ca 1 1 update (a, k. n, nbase. b I k I ast, nothrd, numthrd) 
return 
end 
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c ROUTINE OF UPDATING REMAINING PORTION ACCORDING TO INFORMATION 

ABOUT BLOCK LU DECOMPOSITION 
subrour i ne up(Jate (a. k, n, nbase. b I k, nothrd. numthrd) 
ca 1 1 b'update (a. k. n. nbase, b I k, nothrd. numthrd) 
ca 1 1 d-update (a, k, n, nbase. b I k. nothrd. numthrd) 
ca 1 1 c-update (a. k. n, nbase, b I k, nothrd. numthrd) 
ca 1 1 a-update (a. k. n, nbase, b I k, nothrd. numthrd) 
ca 1 1 g-update (a, k, n. nbase. b I k. nothrd, numthrd) 
BARRIER SYNC 
if (nothread=l)then 
c UPDATE 1 OF e 

ca 1 1 e-updatel (a (nbase+1 . nbase+1) . k. n. b I k) 
end If 

BARRIER SYNC 

I en^(nbase+numthrd-l) /numthrd 
r s*— (nothrd-1 ) * I en+1 
le^nothrd*len 

ca 1 1 df-update (a (nbase+1 . 1 ) , k, n, a (nbase+1 . nbase+1 ) . is, t e, b I k) 

nbase2"^nbase+b I k 
I en-^ (n-nbase2+numth r d-1 ) /numth rd 
i s2<— nbase2+ (nothrd-1) * I en+1 
i e2*— nbase2+nothrd* I en 

ca 1 1 df-update (a (nbase+1 , 1 ) . k. n. a (nbase+1 , nbase+1 ) , is, 1 e, b I k) 
BARRIER SYNC 
if (nothread=1)then 
c UPDATE 2 OF e 

ca 1 1 e-update2 (a (nbase+1 , nbase+1 ) . k, n, b I k) 
end if 

BARRIER SYNC 

I en^ (nbase+numthrd-1) /numthrd 
i s*- (noth rd-1 ) * I en+1 
ie^nothrd*len 

ca 1 1 bh-update (a (I . nbase+1 ) . k. n, a (nbase+1 , nbase+1 ) . is, 1 e, b t k) 

nbase2^nbase+blk 

I en<— (n-nbase2+numth rd-1) /numthrd 

i s2<— nbase2+ (noth rd-1 ) ♦ I en+1 

i e2*-nbase2+nothrd* I en 

ca 1 1 bh-update (a ( I , nbase+1) . k. n, a (nbase+1 . nbase+1) . Is, i e, b I k) 
BARRIER SYNC 
if (noth read=1) then 
c UPDATE 3 OF e 

ca 1 1 e-update3 (a (nbase+1, nbase+1) . k, n, b I k) 
end if 

BARRIER SYNC 

1 en= (n+numthrd-1 ) /numthrd 
is= (nothrd-1) * I en+1 
i e=m 1 n (n. noth r d* I en) 
if (ip(i)>i)then 

exchange (a (is: I e. I), ads: I e, ip(i))) 

end if 

enddo 

BARRIER SYNC 

el Iminate threads 



return 
end 
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UPDATE OF BLOCK B 

subroutine b-update(a, k. n. nbase. bik, nothrd. numthrd) 

shared array a(k, n) 

I en-^Cnbase+numthrd-l) /numthrd 

isl*-(nothrd-l)*len 

iel*-nothrd*len 

a(is: ie. iof+1 : iof + blk) 

^a(is:ie, iof+1 : iof +b Ik) *TRU-U( iof+1 : iof +b Ik, i of +1 : i of +b I k) -1 

! TRU-U UPPER TRIANGULAR MATRIX WITH DIAGONAL ELEMENT=1.0 
return 
end 

UPDATE OF BLOCK D 

subroutine d-update (a. k, n. nbase, b Ik. nothrd. numthrd) 

shared array a(k, n) 

I en*— (nbase+numthrd-1) /numthrd 

i si ^ (nothrd-1) *l en 

lei*— nothrd* I en i si 

iof=nbase 

a (is: ie, iof+1 : iof + blk) 

^TRL(iof+l: iof+blk, iof+1 : iof+blk)-l*a(is: ie. Iof+1 : iof +b Ik) 

! TRL INDICATES LOWER TRIANGULAR MATRIX 
return 
end 
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UPDATE OF BLOCK C 

subroutine c-update(a, k. n. nbase. bik, nothrd. numthrd) 
shared array a(k, n) 
nbase2^nbase+b I k 
i of=nbase 

I en^ (n-nbase2+numthrd-l) /numthrd 
i s2^nbase2+ (nothrd-1 ) ♦ 1 en+1 
i e2^nbase2+nothrd* I en 
a(l: iof. is2: ie2) 
«-a(1 : iof, is2: ie2) 

-ad : iof. lof+l: iof+blk)*a(lof+1 : iof+blk, is2:ie2) 

return 

end 

UPDATE OF BLOCK A 

subroutine a-update (a. k. n, nbase, b Ik. nothrd, numthrd) 

shared array a (k, n) 

I en-^ (nbase+numth rd-1 ) /numthrd 

is2— (nothrd-1) *l en 

ie2*-nothrd*len 

i of =nbase 

a(1: iof. is2: ie2) 

—ad : iof, is2: ie2) 

-ad : iof, i of+1 : i of+b I k)*a(iof+1: iof+blk, is2:ie2) 

return 

end 



FIG. 12 



PJUUU2L PHOCBSSING METHOD ... Oct. 24, 2003 

Makoto M«kanlshi. 

Greer, Buma & Craln, X<td. (Pafcriek Bums) 
R«£. Mo. 1503.68591 

Sbeet 13 of 2» (312) 360 0080 



UPDATE OF BLOCK G 

subroutine g-update(a, k. n, nbase, bik. nothrd. numthrd) 

shared array a(k, n) 

I en*- (nbase+numthrd-1 ) /numthrd 

is2*-(nothrd-l)*len 

ie2*-nothrd*len 

iof<— nbase+blk 

a(iof+l :n. is2: ie2) 

a(iof+l :n, is2: ie2) 
-a(iof+l:n, iof+1 : iof+blk)*a (iof+1 : iof+blk, is2:ie2) 
return 
end 

FIRST UPDATE OF BLOCK E 
subroutine e-updatel (s, k, n, bIk) 
shared array s(k. n) 

do i=1.blk 

s(1:i-1. i+l:blk)^s(l:i-l. i+1 :blk)-s(l : i-1. i)*s(l. i+l:blk) 

enddo 

return 

end 

SECOND UPDATE OF BLOCK E 
subroutine e-update2(s, k. n. bIk) 
shared array s(k. n) 

do i=1.blk 

tmp*-1. 0/s(i. i) 

s(i, 1 : i-1)*-tmp*s(i. 1 : i-1) 

s(i+i:blk. l: i-1)^s(l+1:blk, 1: i-l)-s(j. l:i-l)*s(i+l:blk. i) 

s(i+l:blk, i)— s(i+1:blk. r)*tmp 

s(i, tmp 

enddo 

return 

end 
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c FINAL UPDATE OF BLOCK E 

subriutine e-update3 (s, k, n, b I k) 
shared array s(k. n) 

do i=1.blk 

s(1: Hi-D'^sO: 1 : i-l)-s(1 : I)*s(i. 1 : i-1) 

s(l:l-l. s(l:i-l. i)*a(i, i) 

enddo 

return 

end 

c UPDATE OF BLOCKS D AND F 

subroutine df-update(a, k, n, s, is, ie. I en) 
shared array a (k, *) , s (k, *) 
if (len<10)then 
do i=l. len 

a(1 : is: ie) a (1 : i-1, is: ie)-s(l : i)*a (i. is: ie) 

enddo 

e I S6 

if (len>=32 or len<=20)then 

lenl^len/2 

Ien2^len-len1 

else 

lenl^len/3 
Ien2^len-lenl 

end if 

cal I df-update(a, k, n, s, Is, ie. lenl) 
a(1 : lenl. is: ie) 

♦-a (1 : 1 enl , i s : i e) -s (1 : 1 enl . I en1+1 : 1 en) ^a ( I en1+l : I en, 1 s : i e) 

cal I df-update(a(lenl+l, 1). k, n, s(lenl+l, lenl+1), is, ie, ien2) 

end if 

return 

end 
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c UPDATE OF BLOCKS B AND H 

subroutine bh-update(a, k, n. s. is. ie, I en) 
shared array a (k. *) . s (k, *) 
If (lenOOthen 
do i=l. len 

a(is:ie, \)* — a(is:ie, i)*s(i, i) 

a(is:ie, i)*-a(is:ie. i)-a(is:ie, 1+1 : len)*s(i+l : len, i) 
enddo 

6 I 86 

if (len>=32 or len<=20)then 

lenl^len/2 

Ien2*-len-len1 

else 

len1^len/3 
Ien2'<— len-lenl 
end if 

cal I bh-update(a. k. n. s. is, ie, lenl) 
a(is: ie, 1: lenl) (is: ie. 1 : lenD 

-a(is: ie. lenl+1 : len)*s(lenl+l : len. 1 : lenl) 
call bh-update(a(1. lenl+1). k, n. s(lenl+l, lenl+1), is. ie. Ien2) 
end if 
return 
end 
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START OF INVERSE MATRIX 
INPUTTING shared ARRAY A(k, n) 
AS SUBROUTINE 



GENERATING THREAD 

SETTING TOTAL NUMBER OF THREADS FOR LOCAL 
AREA numthr AND THREAD NUMBER ASSIGNED TO 
EACH THREAD FOR nothrd IN EACH THREAD 
SETTING FOLLOW) NGS IN EACH THREAD 
SETTING BLOCK WIDTH FOR ibik 
SETTING nb=(n+iblk-1)/iblk 
SETTING i=l 




S10 



SETTING 



(i-l)*iblk FOR 
nbase 



SI 2 



nbase= (nb-l)*lblks 



SI 7 



FOR 

A(nbase+1 :n, nbase+1 :nbase+n). 
BLOCK LU DECOMPOSITION IS 
PERFORMED TO UPDATE ROW BLOCK AND 
LOWER RIGHT SQUARE MATRIX, 
i p (nbase+1 : nbase+ ibik) CONTA I NS 
INFORMATION FOR EXCHANGE OF ROWS. 
WHICH IS SET AS SUBROUTINE. 
PARALLEL ALGORITHM OF 
THIS PORTION IS DESCRIBED 
IN PATENT APPLICATION 
FILED BEFORE. 



S13 



PERFORMING 
LU-DECOMPOSITION 
ON A (nbase+1 :n. 

nbase+1 :n) 
ip (nbase+1 :n-1) 
, CONTAINS INFORMATION 
^ABOUT ROW EXCHANGE 



CALLING 
SUBROUTINE 
exchgrow. AND 
EXCHANGING ROW 

VECTORS FOR 
A (nbase+1 : n-1 , 
1 : nbase) 



SI 9 



CALLING 
SUBROUTINE exchgrow. 
AND EXCHANGING ROW VECTORS OF 
A (nbase+1 : nbase+ ibik. 1 : nbase) 
(EXCHANGING A (j. 1: nbase) 

FOR A(ip(j), 1 : nbase 
IF ip(j)>j IS SATISFIED 
WHEN j IS CHANGED 
FROM nbase+1 
TO nbase+iblk) 
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CALLING 
SUBROUTINE 
update TO 
.UPDATE ANOTHER, 
BLOCK 



1 = 1+1 



CALLING 
SUBROUTINE \^ S20 
UPDATE TO 

UPDATE 



CALLING SUBROUTINE 
'exchgcol FOR EXCHANGING 
ROW VECTORS 
EXCHANGING A(l:nj) FOR 
A(1:n. ip(j)) IF ip(j)>j 
IS SATISFIED WHEN j IS 
CHANGED BY SEQUENTIALLY 
SUBTRACTING 1 FROM n 
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REMOVING THREAD GENERATED FOR 
PARALLEL PROCESSING 



SI 6 



FIG. 16 



VMOOJ^ PBOCESSIMG METHOD ... Oct. 24, 2003 

Makoto NaJcanishi 

Greer, Bums & Grain, X<td. (Patrick Burns) 
Re£. Ho. 1503.68591 

ShMt 17 e£ 29 (312) 360 0080 



SUBROUTINE update ^ 



ASSIGNING nbase. 
ibik. numthrd WHICH IS 
INFORMATION ABOUT ARRAY A 

AND THREAD, AND NOTHRD 
INDICATING NUMBER OF EACH 
THREAD WHEN SUBROUTINES 
b-update, d-update, 
c-update, a-update, 
AND g-update ARE CALLED 
CALLING SUBROUTINE 
b-update TO UPDATE 
BLOCK 



S30 




TAKING barrier SYNCHRONIZATION 
BETWEEN THREADS 



S35 




TAKING barrier SYNCHRONIZATION 
BETWEEN THREADS 



S38 



DETERMINING AND ASSIGNING STARTING 
POINT (is) AND ENDING POINT (ie) TO BE 
SHARED BY EACH THREAD BY df-update. 
I en=(nbase+numthrd-1) /numthrd 
i s= (nothrd-1 ) * I en+1 , 
ie=m in (nbase. nothrd*len) 
SETTING FIRST DIMENSION LEADER 
lstart=nbase+l OF THE BLOCK AND 
BLOCK WIDTH len=rblk 



839 




FIG. 17 
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SIMILARLY ASSIGNING STARTING POINT 

AND ENDING POINT 

nbase2=nbase+ i b I k, 

I en= (n-nbase2+nunithrd-1) /numthrd 

i s2=nbase2+ (nothrd-1 ) * I en+1 , 

I e2=m i n (n, nbase2+nothrd* I en) 



841 




SETTING FIRST 
DIMENSION LEADER 
istart=nbase+l OF 
THE BLOCK AND BLOCK 

WIDTH len=ibtk. 
CALLING SUBROUTINE 
df-update TO UPDATE 
BLOCK f 



842 



TAKING barrier 
SYNCHRONIZATION BETWEEN THREADS 
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TAKING barrier SYNCHRONIZATION 
BETWEEN THREADS 



T 



846 
847 



COMPUTING STARTING POINT AND ENDING POINT 

ASSIGNED TO EACH THREAD OF bh-update. 

I en= (nbase+numthrd-1 ) /numthrd, 

is= (nothrd-1)* I en+1 , 

1 e=m i n (nbase, nothrd* I en) 

SETTING FIRST DIMENSION LEADER 1 start=nbase+1 
OF THE BLOCK. AND BLOCK WIDTH len=lblk 
CALLING SUBROUTINE bh-update TO UPDATE BLOCK b 



COMPUTING 
STARTING POINT 
^AND ENDING POINT 
I ASSIGNED TO EACH 
THREAD OF 
bh-update 



848 
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nbase2=nbase+ 1 b I k, I en= (n-nbase2+numthrd-l ) / 

numthrd I s2=nbase2+ (nothrd-1 ) * I en+1 , 

i e2-fni n (n, nbase2+nothrd* I en) 

SETTING FIRST DIMENSION LEADER i start=nbase+1 

OF THE BLOCK AND BLOCK WIDTH len=iblk 




TAKING barrier 
SYNCHRONIZATION BETWEEN THREADS 



FIG. 18 
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c 



SUBROUTINE b-update 



3 



COMPUTING STARTING POINT AND ENDING POINT OF 

FIRST DIMENSION TO BE SHARED BY EACH THREAD 

I en= (nbase+numthrd-1 ) /numthrd 

i si = (noth rd-1 ) * I en+1 , 

iel=min(nbase, nothrd*len), lof=nbase 



a(isl : iel, iof+1 : iof+iblk) = 

a(isl : iel, iof+1 : iof+iblk) 

*TRU-U (a ( i of +1 : i of + j b I k, i of +1 : i of + i b I k) ) -1 

TRU-U REFERS TO UPPER TRIANGULAR MATRIX 

HAVING DIAGONAL ELEMENT OF 1.0 



c 



return 



c 



SUBROUTINE d-update 



3 



COMPUTING STARTING POINT AND ENDING POINT 

SHARED BY EACH THREAD 

I en= (nbase+numthrd-1 ) /numthrd 

i s1 = (nothrd-1) * I en+1 , 

ie1=min(nbase, nothrd*len), iof=nbase 



a (iof+1 : iof+iblk, isl : ie1)= 

TRL(a(iof+1 : iof+iblk, iof+1 : iof+iblk))-1* 

a(iof+1 : iof+iblk. isl : iel) 

TRL REFERS TO LOWER TRIANGULAR MATRIX OF 

SQUARE MATRIX 



c 



return 



3 



FIG. 19 
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c 



SUBROUTINE c-update 



COMPUTING STARTING POINT AND ENDING POINT OF SECOND 

DIMENSION TO BE SHARED BY EACH THREAD 

nbase2=nbase+iblk. 

I en= (n-nbase2+numthrd-l ) /numthrd 

i s2=nbase2+ (nothrd-1 ) * I en+1 , 

ie2=min(n. nbase2+nothrd*len) , iof=nbase 



a(1:iof, Is2:ie2) = 

a(1 : lof, iof+1 : iof+iblk)*a(iof+l : iof+iblk. is2: ie2) 



c 



return 



3 
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c 



SUBROUTINE a-update 



) 







COMPUTING STARTING POINT AND ENDING POINT OF 

SECOND DIMENSION TO BE SHARED BY EACH THREAD 

1 en= (nbase+numthrd-1 ) /numthrd 

Is2= (nothrd-1)*len+l. 

ie2=min(nbase, nothrd*len), iof=nbase 






a(1 : iof. is2: ie2) = 

a(1:lof. is2:ie2)-a(l:iof, iof+1 : iof+iblk) 
*a(iof+1 : Iof+iblk, is2: Ie2) 







c 



return 



) 



FIG. 2 1 
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c 



SUBROUTINE g-update 



) 







COMPUTING STARTING POINT AND ENDING POINT OF 
SECOND DIMENSION TO BE SHARED BY EACH THREAD 
1 en= (nbase+numthrd-1 ) /numthrd 
Is2= (nothrd-1)*len+1. 
Ie2=(nbase, nothrd*len), iof=nbase 






a(iof+l :n, i s2: ie2)=a(iof+1 :n, is2: Ie2) 
-a(iof+1:n. iof+1 : iof+iblk) 

*a ( i of +1 : i of + i b 1 k, i s2 : 1 e2) 







c 



return 



) 



FIG. 2 2 
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SUBROUTINE e-upciate 
ASSIGNING ARGUMENT SUCH THAT ELEMENT OF UPPER LEFT 
CORNER OF BLOCK E CAN BE FIRST ELEMENT OF S(k.*). 




s(l:i-l. i+1 


:iblk)=s(1 


i+1:iblk)- 


s(1:l 


-1, i)*s(l. 


i-i-1:iblk) 




i = i+1 





S87 



c 



return 



3 
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SUBROUTINE e2-update 
ASSIGNING ARGUMENT SUCH THAT ELEMENT OF UPPER LEFT 
CORNER OF BLOCK E CAN BE FIRST ELEMENT OF S(k.*). 




tmp=l. 0/s(i. i) 

s(i, 1 : i-1)=tinp*s(i, 1 : 

s(i+l:iblkj:i-)=s(i+1, :iblk,l:i-) 

-s(i,l:i-l)*s(i+1:iblk. i) 

s(i+1 : ibik. l)=-a(i+l : ibik, i)*tmp 

a(i, i)=tnip 

i = i+l 



S92 



c 



return 



:) 



FIG. 2 4 
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SUBROUTINE e3-update 
ASSIGNING ARGUMENT SUCH THAT ELEMENT OF UPPER LEFT 
CORNER OF BLOCK e CAN BE FIRST ELEMENT OF S(k.*). 



i=l 



S95 



S96 




s(l:i-l,l:i-1)=s(l:i-1.1:i-l)-s(l 
*s(i.1:i-1) 

s(l:i-l, i)-s(l:l-l)*s(i. i) 
i = i+1 



i) 



897 



c 



return 



FIG. 2 5 
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SUBROUTINE df -update 
(RECURSIVE PROGRAM) 
ASSIGNING is, ie TO STARTING POINT AND ENDING POINT 

INDICATING RANGE OF PROCESS BY EACH THREAD 
ASSIGNING ARGUMENT SUCH THAT ELEMENT OF UPPER LEFT 
CORNER OF BLOCK E CAN BE FIRST ELEMENT OF S(k.*) 
ASSIGNING FIRST DIMENSION LEADER i start AND BLOCK 
WIDTH I en OF BLOCK TO BE PROCESSED 




8102 




8103 




js=istart, je=i start-l + len-1 
a(js: je, is: ie)=a(js: je, is: ie)- 
s(js: je. I)*a(istart+i-l, is: ie) 



lenl=len/2 
Ien2=len-lenl 



lenl=len/3 
Ien2=len-lenl 



RECURSIVELY 
^CALLING SUBROUTINE 
df-update 
(ASSIGINING BLOCK 
LEADER i start AND 
vBLOCK WIDTH leni) 



8106 



8107 



8108 



js=istart, je=istart+len1-l, 
js2=js-nbase, je2=js2+lenl-1 
js3=js2+len1. je3=js3+len2-l, 
j s4= i stat+ leni, j e4= j s4+ 1 en2-l 
a ( j s : j e, i s : i e) =a ( j s : j e, i s : i e) 
-s(js2: je2, js3: je3)*a (js4, je4: 1 en, is: ie) 
I start2= i start+ 1 enl 




FIG. 2 6 
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SUBROUTINE bh-update 
(RECURSIVE PROGRAM) 
ASSIGNING is. ie TO STARTING POINT AND ENDING 
POINT INDICATING RANGE OF PROCESS BY EACH THREAD 
ASSIGNING ARGUMENT SUCH THAT ELEMENT OF UPPER LEFT 
CORNER OF BLOCK E CAN BE FIRST ELEMENT OF S(k.*) 
ASSIGNING LEADER i start AND BLOCK WIDTH I en OF 
BLOCK TO BE PROCESSED 



8118 




j=istart+i-1. j2=j-nbase, 
je=istart+len-l. je2=je-nbase 

ads: Ie, j)=-a(is: ie. j)*s(j2. j2) 
a(is: ie, j)=a (is: ie, j)-a(ls: ie, j+1 : je) 
*s(j2+l : je2. j2) 



len1=len/2 
Ien2=len-lenl 



lenl=len/3 
I en2= I en- 1 enl 



S121 



RECURSIVELY 
CALLING SUBROUTINE 

bh-update 
(ASSIGINING BLOCK 
LEADER istart2 AND 
BLOCK WIDTH lenl) 



SI 22 



S123 



js=i start, je=istart+len1-1 , js2=js- 
nbase, je2=js2+len1-l 
js3=istart+len1, je3=js2+len2-l, js4=js3- 
nbase. je4=js4+len2-l 
a (is: ie, js: je)=a(is: ie, js: je)- 
a(is: ie. js3: je3)*s(js4: jse, js2: je2) 
i start2= i start+ 1 enl 



FIG. 2 7 
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Q SUBROUTINE exchgrow ^ 



TAKING barrier 


SYNCHRONIZATION 







COMPUTING STARTING POINT AND ENDING POINT 
SHARED BY EACH THREAD 
I en= (n+numthrd-1 ) /numthrd 
I s= (nothrd-1 ) * I en+1 , I e=in i n (nbase, nothrd* I en) 
j=nbase+1 




N 



SI 30 



SI 31 



EXCHANGING A(J. Is:le) FOR A(ip(J), ls:le) 



SI 34 



j=j+l 



SI 35 



TAKING barrier SYNCHRONIZATION 



SI 36 



c 



return 



3 
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Q SUBROUTINE exchgrow ^ 



TAKING barrier 


SYNCHRONIZATION 







COMPUTING START POINT AND ENDING POINT 
SHARED BY EACH THREAD 

I en= (nbase+numthrd-1 ) /numthrd 
i s= (nothr d-1 ) * I en+1 , i e=ni i n (nbase, nothr d* I en) 

j=n-l 



SI 42 




SI 40 



S141 



EXCHANGING A(is:ie, j) FOR 
A(is: ie, ip(j)) 



SI 44 



SI 45 



TAKING barrier SYNCHRONIZATION 



SI 46 



c 



return 
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